# How Does Improvement in Commuting Affect Employees? Evidence from a Natural Experiment
# Version: 20211014

## Yao Lu (luyao@sem.tsinghua.edu.cn),        Tsinghua University
## Xinzheng Shi (shixzh@sem.tsinghua.edu.cn), Tsinghua University
## Jagadeesh Sivadasan (jagadees@umich.edu),  University of Michigan
## Zhufeng Xu (zhufeng@zhufengxu.com),        Central University of Finance and Economics

#### Figure 2: Trends in Differentials for Log (BonusT)
grapg_trend_quarter_add_zero <- function(fe_data, level = 0.95){
  coef <- c(coef(fe_data)[1:7], 0, coef(fe_data)[8:15])
  low_int <- c(confint(fe_data, level = level)[, 1][1:7], 0, confint(fe_data, level = level)[, 1][8:15])
  upp_int <- c(confint(fe_data, level = level)[, 2][1:7], 0, confint(fe_data, level = level)[, 2][8:15])
  coef_levels <- c("2013Q1", "2013Q2", "2013Q3", "2013Q4", "2014Q1", "2014Q2", "2014Q3", "2014Q4", 
                   "2015Q1", "2015Q2", "2015Q3", "2015Q4", "2016Q1", "2016Q2", "2016Q3", "2016Q4")
  
  ext_lm <- data.frame(coef = coef,
                       time = 1:16,
                       low_int = low_int,
                       upp_int = upp_int)
  
  ggplot(data = ext_lm, aes(y = coef, x = time)) + geom_line(size = 1.2) + geom_point(size = 2) +
    geom_errorbar(data = ext_lm, aes(ymin = low_int, ymax = upp_int), col = "black", alpha = 0.8) +
    scale_x_continuous(breaks= seq(1, 16, 1), labels= coef_levels) +
    xlab("Date") + ylab(TeX("$Coefficients \\, on \\, Quarter \\times NearSubway$")) +
    geom_vline(aes(xintercept = 8.5), colour = "black", linetype = "dashed", size = 1.4) +
    geom_hline(aes(yintercept = 0  ), colour = "black", linetype = "dashed", size = 1.4) +
    theme_light() +
    theme(axis.text.x  = element_text(angle = 30, hjust = 1)) +
    theme(text=element_text(family = "Times New Roman", size = 12, lineheight = 2))
}

## Figure 3: Time of Arrival and Leaving
Plot_TimeofArrivalAndLeaving <- function(plot_data = Swipe, data = midnightmin_cutoff, type = "Arrival") {
  
  if (type == "Arrival") {
    cutoff1   <- 405
    cutoff2   <- 855
    Data      <- filter(data, midnightmin >= cutoff1 & midnightmin <= cutoff2)
    Data$hour <- ifelse(nchar(Data$hour) == 1, paste0("0", Data$hour), Data$hour)
    Data$lab  <- paste0(Data$hour, ":", Data$min)
    
    Xintercept <- 540
    Xlab       <- "Time of Arrival"
    P          <- ggplot(data = filter(plot_data, TimeofArrival >= cutoff1 & TimeofArrival <= cutoff2),
                aes(x = TimeofArrival, group = `Subway opened`, linetype = `Subway opened`))
  }
  if (type == "Leaving") {
    cutoff1   <- 645
    cutoff2   <- 1275
    Data      <- filter(data, midnightmin >= cutoff1 & midnightmin <= cutoff2)
    Data$hour <- ifelse(nchar(Data$hour) == 1, paste0("0", Data$hour), Data$hour)
    Data$lab  <- paste0(Data$hour, ":", Data$min)
    
    Xintercept <- 1050
    Xlab       <- "Time of Leaving"
    P          <- ggplot(data = filter(plot_data, TimeofLeaving >= cutoff1 & TimeofLeaving <= cutoff2),
                         aes(x = TimeofLeaving, group = `Subway opened`, linetype = `Subway opened`))
    
  }
  
  P + geom_density(alpha = 0.4) +
    scale_linetype_manual(name = "Subway opened", values = c(1, 3)) + 
    geom_vline(xintercept = Xintercept, colour = "black", size = 1.4, linetype = "dashed") +
    xlab(Xlab) + ylab("Density") +
    scale_x_continuous(limits = c(cutoff1, cutoff2), breaks = Data$midnightmin, labels = Data$lab) +
    theme_light() +
    theme(legend.position=c(0.85, 0.80)) +
    theme(axis.text.x  = element_text(angle = 30, hjust = 1)) +
    theme(text=element_text(family = "Times New Roman", size = 12, lineheight = 2))
}


#### Table 1: Summary Statistics
SummaryStatistics <- function(Data = Main, CrossSection = CrossSection) {
  Digits = 2
  
  RowNames <- c("NearSubway Dummy",   "TimeSaved (one-way, in minutes, affected workers only)", "Distance (kilometer)",
                "Bonus (RMB)",        "Log(Bonus$^\\text{T}$)",                                 "Total Income Net of Bonus (RMB)",
                "Total Income (RMB)", "Log(Total Income Net of Bonus)",                         "Log(Total Income$^\\text{T}$)",
                "Age (year)",         "Male Dummy",                                             "Experience (week)",
                "Tenure (week)",      "Education (year)",                                       "Number of Children",
                "Married Dummy",      "Manager Dummy",                                          "New Employee Dummy")
  
  ## Columns (1) - (4)
  Basic_Row_NearSubway <- SummaryStatistics_Basic(Data$NearSubway)
  Basic_Row_TimeSaved  <- SummaryStatistics_Basic(filter(Data, TimeSaved > 0)$TimeSaved / 60)
  Basic_Row_Distance   <- SummaryStatistics_Basic(Data$Distance)
  
  Basic_Row_Bonus         <- SummaryStatistics_Basic(Data$Bonus)
  Basic_Row_LogBonusT     <- SummaryStatistics_Basic(Data$LogBonusT)
  Basic_Row_TiNetBonus    <- SummaryStatistics_Basic(Data$TiNetBonus)
  Basic_Row_Ti            <- SummaryStatistics_Basic(Data$Ti)
  Basic_Row_LogTiNetBonus <- SummaryStatistics_Basic(Data$LogTiNetBonus)
  Basic_Row_LogTiT        <- SummaryStatistics_Basic(Data$LogTiT)
    
  Basic_Row_Age          <- SummaryStatistics_Basic(Data$Age)
  Basic_Row_Male         <- SummaryStatistics_Basic(Data$Male)
  Basic_Row_Experience   <- SummaryStatistics_Basic(Data$Experience)
  Basic_Row_Tenure       <- SummaryStatistics_Basic(Data$Tenure)
  Basic_Row_Education    <- SummaryStatistics_Basic(Data$Education)
  Basic_Row_NChild       <- SummaryStatistics_Basic(Data$NChild)
  Basic_Row_Married      <- SummaryStatistics_Basic(Data$Married)
  Basic_Row_ManagerNov14 <- SummaryStatistics_Basic(Data$ManagerNov14)
  Basic_Row_NewEmployeeD <- SummaryStatistics_Basic(Data$NewEmployeeD)
  
  Basic <- t(bind_rows(Basic_Row_NearSubway, Basic_Row_TimeSaved,     Basic_Row_Distance,
                       Basic_Row_Bonus,      Basic_Row_LogBonusT,     Basic_Row_TiNetBonus, 
                       Basic_Row_Ti,         Basic_Row_LogTiNetBonus, Basic_Row_LogTiT,
                       Basic_Row_Age,        Basic_Row_Male,          Basic_Row_Experience,
                       Basic_Row_Tenure,     Basic_Row_Education,     Basic_Row_NChild,
                       Basic_Row_Married,    Basic_Row_ManagerNov14,  Basic_Row_NewEmployeeD))
  
  ## Column (5) - (8)
  BT <- xBalance(NearSubway ~ TimeSaved + Distance + Bonus + LogBonusT + TiNetBonus + Ti + LogTiNetBonus + LogTiT +
                   Age + Male + Experience + Tenure + Education + NChild + Married + Manager + NewEmployeeD,
                 data = CrossSection, report = c("all"))
  
  Affected_Mean   <- round(c(1, BT$results[, "NearSubway=1", 1][1:17]), digits = Digits)
  Unaffected_mean <- round(c(0, BT$results[, "NearSubway=0", 1][1:17]), digits = Digits)
  Diff            <- round(c(1, BT$results[, "adj.diff",     1][1:17]), digits = Digits)
  Pvalue          <- round(c(0, BT$results[, "p",            1][1:17]), digits = Digits)
  
  CrossSection_BT <- data.frame(Affected_Mean   = Affected_Mean,
                                Unaffected_mean = Unaffected_mean,
                                Diff            = Diff,
                                Pvalue          = Pvalue)
  ## Columns (9) - (12)
  Matched_BT <- xBalance(NearSubway ~ TimeSaved + Distance + Bonus + LogBonusT + TiNetBonus + Ti + LogTiNetBonus + LogTiT +
                   Age + Male + Experience + Tenure + Education + NChild + Married + Manager + NewEmployeeD,
                 data = filter(CrossSection, MatchedFlag == 1), report = c("all"))
  
  Matched_Affected_Mean   <- round(c(1, Matched_BT$results[, "NearSubway=1", 1][1:17]), digits = Digits)
  Matched_Unaffected_mean <- round(c(0, Matched_BT$results[, "NearSubway=0", 1][1:17]), digits = Digits)
  Matched_Diff            <- round(c(1, Matched_BT$results[, "adj.diff",     1][1:17]), digits = Digits)
  Matched_Pvalue          <- round(c(0, Matched_BT$results[, "p",            1][1:17]), digits = Digits)
  
  Matched_CrossSection_BT <- data.frame(Matched_Affected_Mean   = Matched_Affected_Mean,
                                        Matched_Unaffected_mean = Matched_Unaffected_mean,
                                        Matched_Diff            = Matched_Diff,
                                        Matched_Pvalue          = Matched_Pvalue)
  ## 
  BalanceTest     <- bind_cols(CrossSection_BT, Matched_CrossSection_BT)
  
  ## Column (13)
  MatchSummary         <- summary(matchit(NearSubway ~ Distance + Age + Male + Experience + Tenure + Education + 
                                            NChild + Married + Manager + NewEmployeeD + Company,
                                  method = "optimal",  data = select(CrossSection, -TimeSaved)))
  
  BalanceImproved      <- MatchSummary$reduction$`Mean Diff.`
  BalanceImproved      <- c(rep(NA, 2), BalanceImproved[2], rep(NA, 6), BalanceImproved[3:11])
  BalanceImproved      <- round(BalanceImproved, digits = Digits)
  
  ##
  BalanceTest          <- t(bind_cols(BalanceTest, BalanceImproved = BalanceImproved))
  
  return(list(RowNames        = RowNames, 
              Basic           = Basic,
              BalanceTest     = BalanceTest))
}

SummaryStatistics_Basic <- function(...) {
  Digits <- 2
  
  OBS    <- sum(!is.na(...))
  MEAN   <- round(mean(  ..., na.rm = TRUE), digits = Digits)
  MEDIAN <- round(median(..., na.rm = TRUE), digits = Digits)
  SD     <- round(sd(    ..., na.rm = TRUE), digits = Digits)
  
  BASIC <- data.frame(obs    = OBS, 
                      mean   = MEAN,
                      median = MEDIAN,
                      sd     = SD)
  return(BASIC)
}


#### Table 2: Impact of the Opening of Subway Line 15 on Employee Compensation
GenRegTable_Enhanced <- function(fit, depvar, type = "log1", Data = Main, WhichBeta = 1) {
  beta  <- as.numeric(fit$coefficients)[WhichBeta]
  clse  <- as.numeric(fit$cse)[WhichBeta]
  cpval <- as.numeric(fit$cpval)[WhichBeta]
  adjR2 <- as.numeric(summary(fit)$adj.r.squared)
  obs   <- as.numeric(summary(fit)$N)
  
  if (any(grepl(pattern = "Self-Reported", fit$terms[[2]]))) {
    N_affected <- as.numeric(length(unique(filter(Data, `Self-Reported NearSubway` == 1)$ID)))
    N_control  <- as.numeric(length(unique(filter(Data, `Self-Reported NearSubway` == 0)$ID)))
  } else {
    N_affected <- as.numeric(length(unique(filter(Data, NearSubway == 1)$ID)))
    N_control  <- as.numeric(length(unique(filter(Data, NearSubway == 0)$ID)))
  }

  if (type == "log") {
    M_depvar  <- mean(log(as.matrix(Data[, depvar])), na.rm = TRUE)
    SD_depvar <- sd(  log(as.matrix(Data[, depvar])), na.rm = TRUE)
  }
  if (type == "log1") {
    M_depvar  <- mean(as.matrix(log(Data[, depvar] + 1)), na.rm = TRUE)
    SD_depvar <- sd(  as.matrix(log(Data[, depvar] + 1)), na.rm = TRUE)
  }
  if (type == "level") {
    M_depvar  <- mean(as.matrix(Data[, depvar]), na.rm = TRUE)
    SD_depvar <- sd(  as.matrix(Data[, depvar]), na.rm = TRUE)
  }
  
  M_NS  <- mean(as.matrix(Data[, "NearSubway"]), na.rm = TRUE)
  SD_NS <- sd(  as.matrix(Data[, "NearSubway"]), na.rm = TRUE)
  
  if (depvar == "LogTiT") {
    M_depvar_l <- mean(as.matrix(Data[, "Ti"]), na.rm = TRUE)
    ChangeL <- round((M_depvar_l + 6904.22), 3) * round(beta, 3)
  }
  if (depvar == "LogTiNetBonus") {
    M_depvar_l <- mean(as.matrix(Data[, "TiNetBonus"]), na.rm = TRUE)
    ChangeL <-  round(M_depvar_l, 3) * round(beta, 3)
  }
  if (depvar == "Bonus") {
    M_depvar_l <- mean(as.matrix(Data[, "Bonus"]), na.rm = TRUE)
    ChangeL <- beta
  }
  if (depvar == "LogBonusT") {
    M_depvar_l <- mean(as.matrix(Data[, "Bonus"]), na.rm = TRUE)
    ChangeL <- round((M_depvar_l + 6904.22), 3) * round(beta, 3)
  }
  
  EffectOfSD <- round(beta, 3) / round(SD_depvar, 3) * 100
  
  ReturnTable <- data.frame(beta = beta, clse = clse, cpval = cpval, adjR2 = adjR2, obs = obs,
                            N_affected = N_affected, N_control = N_control,
                            M_depvar = M_depvar, SD_depvar = SD_depvar,
                            M_NS = M_NS, SD_NS = SD_NS, M_depvar_l = M_depvar_l,
                            ChangeL = ChangeL, EffectOfSD = EffectOfSD,
                            IndividualFE = "Yes", CompanyYearMonthFE = "Yes") %>%
    mutate(IndividualFE       = as.character(IndividualFE),
           CompanyYearMonthFE = as.character(CompanyYearMonthFE)) %>%
    return()
}



#### Table 3: Effect of the Subway on Employees' Attendance and Time at Work
GenRegTable <- function(fit, depvar, type = "log1", Data = Main, WhichBeta = 1) {
  beta  <- as.numeric(fit$coefficients)[WhichBeta]
  clse  <- as.numeric(fit$cse)[WhichBeta]
  cpval <- as.numeric(fit$cpval)[WhichBeta]
  adjR2 <- as.numeric(summary(fit)$adj.r.squared)
  obs   <- as.numeric(summary(fit)$N)
  
  if (any(grepl(pattern = "Self-Reported", fit$terms[[2]]))) {
    N_affected <- as.numeric(length(unique(filter(Data, `Self-Reported NearSubway` == 1)$ID)))
    N_control  <- as.numeric(length(unique(filter(Data, `Self-Reported NearSubway` == 0)$ID)))
  } else {
    N_affected <- as.numeric(length(unique(filter(Data, NearSubway == 1)$ID)))
    N_control  <- as.numeric(length(unique(filter(Data, NearSubway == 0)$ID)))
  }
  
  if (type == "log") {
    M_depvar  <- mean(log(as.matrix(Data[, depvar])), na.rm = TRUE)
    SD_depvar <- sd(  log(as.matrix(Data[, depvar])), na.rm = TRUE)
  }
  if (type == "log1") {
    M_depvar  <- mean(as.matrix(log(Data[, depvar] + 1)), na.rm = TRUE)
    SD_depvar <- sd(  as.matrix(log(Data[, depvar] + 1)), na.rm = TRUE)
  }
  if (type == "level") {
    M_depvar  <- mean(as.matrix(Data[, depvar]), na.rm = TRUE)
    SD_depvar <- sd(  as.matrix(Data[, depvar]), na.rm = TRUE)
  }
  
  M_NS  <- mean(as.matrix(Data[, "NearSubway"]), na.rm = TRUE)
  SD_NS <- sd(  as.matrix(Data[, "NearSubway"]), na.rm = TRUE)
  
  ReturnTable <- data.frame(beta = beta, clse = clse, cpval = cpval, adjR2 = adjR2, obs = obs,
                            N_affected = N_affected, N_control = N_control,
                            M_depvar = M_depvar, SD_depvar = SD_depvar,
                            M_NS = M_NS, SD_NS = SD_NS,
                            IndividualFE = "Yes", CompanyYearMonthFE = "Yes") %>%
    mutate(IndividualFE       = as.character(IndividualFE),
           CompanyYearMonthFE = as.character(CompanyYearMonthFE)) %>%
    return()
}








